* This file creates the necessary data used for Figure F3 and columns (4)-(6) of Table F1
* NOTE: You must run AppF_make_polystar_crowd first before running this file to create necessary data input
* The output of this file is a set of regression coefficients - het_dist50maj_data.dta, is saved in the folder - which is then used in AppF_analysis.do

use "$saveddata/data_complete_crowding.dta", clear

drop d1
encode d1_crowd, gen(d1)

*** Note - d1 here is the crowding group not diagnosis

* Collapse to correct level
		collapse (count) freq = aeattenddisp (sum) afreq = admit (mean) tcount icount admit ///
		mort death30 d_admit d_death cost30_aeem ///
		, by(bin d1)
		
		* Now do analysis...
		egen sumfreq = sum(freq), by(d1)
		gen pfreq = freq/sumfreq
		drop sumfreq

		lab var pfreq "Proportion of patients"
		lab var bin "Exit time (10 min intervals)"
		compress


		** ANALYSIS HERE
		
		* bin polynomial variables
	forv x = 1/15 {
		gen bin`x' = bin^`x'
		}
	
* local split "dist50_maj"	
merge m:1 d1 using "$saveddata/lookup_polystar_disagg_crowd.dta"
keep if _merge==3
drop _merge

/*
gen polystar_cost30_aeem = polystar_total_cost
rename polystar_death polystar_mort
gen polystar_death30 = polystar_mort
*/

egen allpats = sum(freq), by(d1)


save "$het/bunching_data_dist50_maj.dta", replace
*/


local df=100
local split "dist50_maj"
set more off
use "$het/bunching_data_`split'.dta", clear

** Waiting effects
mat wt = J(`df',6,.)


	forv x=1(1)`df'{
	preserve
	
	keep if d1==`x'
	
	su d1
	local y=r(mean)

	
	qui su polystar_wait
	local porder = r(mean)
	
	/*
	if d1==3{
local porder = 13
}
*/

	local start = 180
	local end = 300
	local tol= 0.01

	* waiting times - find end point
	local sumdiff = 999
	local t = 0
	while abs(`sumdiff')>`tol' & `t'<100{
		
		* drop variables created in loop
		cap drop pfreq_cf diff
		
		* update end point
		local end = `end' + 10	
		
		* compute counterfactual
		cap qui reg pfreq bin1-bin`porder' if bin<`start' | bin>`end'
		while _rc>0 {
			local porder = `porder' - 1
			cap qui reg pfreq bin1-bin`porder' if bin<`start' | bin>`end'
			}
		predict pfreq_cf, xb

		* check difference in mass
		gen diff = pfreq - pfreq_cf 
		qui su diff if bin>`start' | bin<`end'
		local sumdiff = r(sum)
			
		* display end point
		di "End point: `end'"
		local t=`t'+1
		}
	drop diff

	* waiting times - compute effects
	qui reg freq bin1-bin`porder' if bin<`start' | bin>`end'
	predict freq_cf, xb
	gen wait = freq*bin 

	gen wait_cf = freq_cf*bin 
	gen wait_diff = wait_cf - wait
	
	* new code to compute wait time impacts
	su wait 
	local wait_obs = r(sum)
	su wait_cf 
	local wait_cf = r(sum)
	su freq
	local n = r(sum)
	
	su wait if bin>=`start' & bin<=`end'
	local wait_obs_aff = r(sum)
	su wait_cf if bin>=`start' & bin<=`end'
	local wait_cf_aff = r(sum)
	su freq if bin>=`start' & bin<=`end'
	local n_aff = r(sum)	
	
	local savingpp = round((`wait_obs' - `wait_cf')/`n',1)
	local savingpc = round((`wait_obs' - `wait_cf')/`wait_cf',0.01)
	local savingpp_int = round((`wait_obs_aff' - `wait_cf_aff')/`n_aff',1)
	local savingpc_int = round((`wait_obs_aff' - `wait_cf_aff')/`wait_cf_aff',0.01)
	
	di "Total waiting time saving (mins per patient): " `savingpp'
	di "Total waiting time saving (% baseline): " `savingpc'
	di "Total waiting time saving in distorted window (mins per patient): " `savingpp_int'
	di "Total waiting time saving in distorted window (% baseline): " `savingpc_int'

	/*
	su wait_diff
	local saving = r(sum)
	su freq
	local patients = r(sum)/2
	su wait_cf
	local totalwait = r(sum)/2
	su wait_diff if bin>=`start' & bin<=`end'
	local saving_int = r(sum)
	su wait_cf if bin>=`start' & bin<=`end'
	local totalwait_int = r(sum)/2
	su freq if bin>=`start' & bin<=`end'
	local patients_int = r(sum)/2
	local savingpp = round(`saving'/`patients',1)
	local savingpc = round(`saving'/`totalwait',0.01)
	local savingpp_int = round(`saving_int'/`patients_int',1)
	local savingpc_int = round(`saving_int'/`totalwait_int',0.01)
	di "Total waiting time saving (mins per patient): " `savingpp'
	di "Total waiting time saving (% of c.f. total): " `savingpc'
	di "Total waiting time saving in distorted window (mins per patient): " `savingpp_int'
	di "Total waiting time saving in disorted window (% of c.f. total): " `savingpc_int'
	*/
	
	* To work out admit/mort effects...
	gen freq_diff = freq_cf - freq 
	egen sumfreq = sum(freq)
	egen sumfreq_pre_cf = sum(freq_cf) if bin>`start' & bin<=240
	egen sumfreq_pre = sum(freq) if bin>`start' & bin<=240
	/*
	egen sumfreq_post_diff = sum(freq_diff) if bin>240 & bin<=`end'
	egen sumfreq_pre = sum(freq) if bin>`start' & bin<=240
	gen w_pre = freq_cf/sumfreq_pre_cf
	gen w_post_diff = freq_diff/sumfreq_post_diff
	gen w = freq/sumfreq_pre
	gen nonmover_pre = sumfreq_pre_cf/sumfreq_pre
*/
	* Generate weights
	gen weight = freq / sumfreq
	egen sumfreq_cf = sum(freq_cf)
	gen weight_cf = freq_cf / sumfreq_cf
	** repeat just for affected area
	gen weight_pre = freq / sumfreq_pre
	gen weight_pre_cf = freq_cf / sumfreq_pre_cf
	
	
	* Work out percentile of the counterfactual distribution where 240 is
	egen temp1 = sum(freq_cf) if bin<241
	egen temp2 = max(temp1)
	egen temp3 = sum(freq_cf)
	gen  temp4 = temp2/temp3
	su temp4
	local perc_cf = r(mean)
	
	
	if `t'<99{
	mat wt[`x',1] = `savingpp'
	mat wt[`x',2] = `savingpc'
	mat wt[`x',3] = `savingpp_int'
	mat wt[`x',4] = `savingpc_int'
	mat wt[`x',5] = `perc_cf'
	mat wt[`x',6] = `x'
	
	}
	
	if `t'>98{
	mat wt[`x',1] = .
	mat wt[`x',2] = .
	mat wt[`x',3] = .
	mat wt[`x',4] = .
	mat wt[`x',5] = .
	mat wt[`x',6] = `x'
	}
	
	
	restore
	di "Diagnosis `x'"

}

/*
}
}
*/
	
cap drop wt*
svmat wt
keep wt* allpats
rename wt6 d1
keep if d1!=.

label var wt1 "Total waiting time saving (mins per patient)"
label var wt2 "Total waiting time saving (% of c.f. total)"
label var wt3 "Total waiting time saving in distorted window (mins per patient)"
label var wt4  "Total waiting time saving in disorted window (% of c.f. total)"
label var wt5 "Percentile of counterfactual distribution = 240 minutes"
label var allpats "Total number of patients treated by trust"

save "$het/wait_`split'_auto.dta", replace



local df=100
local split "dist50_maj"

set more off
* Non-wait effects

use "$het/bunching_data_`split'.dta", clear

forval x=1/`df'{

preserve

keep if d1==`x'

* parameters

qui su polystar_wait
local porderwait = r(mean)

local start = 180
local end = 300
local tol=0.01

* waiting times - find end point
local sumdiff = 999
local t = 0
while abs(`sumdiff')>`tol' & `t'<100{
	
	cap drop pfreq_cf diff
	
	* Update end point
	local end = `end' + 10
	
	* compute counterfactual
	cap qui reg pfreq bin1-bin`porderwait' if bin<`start' | bin>`end'
	while _rc>0 {
			local porderwait = `porderwait' - 1
			cap qui reg pfreq bin1-bin`porderwait' if bin<`start' | bin>`end'
			}
	predict pfreq_cf, xb

	* check difference in mass
	gen diff = pfreq - pfreq_cf 
	qui su diff if bin>`start' | bin<`end'
	local sumdiff = r(sum)
		
	* display end point
	di "End point: `end'"
	local t=`t'+1
	}
drop diff

* waiting times - compute effects
cap qui reg freq bin1-bin`porderwait' if bin<`start' | bin>`end'
while _rc>0 {
			local porder = `porder' - 1
			cap qui reg pfreq bin1-bin`porder' if bin<`start' | bin>`end'
			}
predict freq_cf, xb
gen wait = freq*bin 

gen wait_cf = freq_cf*bin 
gen wait_diff = wait_cf - wait

su wait_diff
local saving = r(sum)
su freq
local patients = r(sum)/2
su wait_cf
local totalwait = r(sum)/2
su wait_diff if bin>=`start' & bin<=`end'
local saving_int = r(sum)
su wait_cf if bin>=`start' & bin<=`end'
local totalwait_int = r(sum)/2
su freq if bin>=`start' & bin<=`end'
local patients_int = r(sum)/2
local savingpp = round(`saving'/`patients',1)
local savingpc = round(`saving'/`totalwait',0.01)
local savingpp_int = round(`saving_int'/`patients_int',1)
local savingpc_int = round(`saving_int'/`totalwait_int',0.01)
di "Total waiting time saving (mins per patient): " `savingpp'
di "Total waiting time saving (% of c.f. total): " `savingpc'
di "Total waiting time saving in distorted window (mins per patient): " `savingpp_int'
di "Total waiting time saving in disorted window (% of c.f. total): " `savingpc_int'

* other outcomes - parameters and inputs to later calculations
gen freq_diff = freq_cf - freq 
egen sumfreq_pre_cf = sum(freq_cf) if bin>`start' & bin<=240
egen sumfreq_post_diff = sum(freq_diff) if bin>240 & bin<=`end'
egen sumfreq_pre = sum(freq) if bin>`start' & bin<=240
gen w_pre = freq_cf/sumfreq_pre_cf
gen w_post_diff = freq_diff/sumfreq_post_diff
gen w = freq/sumfreq_pre
gen nonmover_pre = sumfreq_pre_cf/sumfreq_pre
drop *cf

* other outcomes - compute counterfactuals and selection-only counterfactuals
foreach var of varlist d_admit d_death admit icount mort death30 cost30_aeem{

qui su polystar_`var'
local porder = r(mean)

/*
if d1_all==11 | d1_all==15 | d1_all==16{
local porder = 10
local tol=0.02
}
*/

	* counterfactual outcomes
	cap qui reg `var' bin1-bin`porder' if bin<`start' | bin>260
	while _rc>0 {
			local porder = `porder' - 1
			cap qui reg `var' bin1-bin`porder' if bin<`start' | bin>260
			}
	
	predict `var'_cf, xb
	
	* non-movers: pre-target counterfactual average outcome 
	qui egen `var'_pre_cf = sum(`var'_cf*w_pre)
		
	* movers: post-target counterfactual average outcome (weighted by mover proportions)
	qui egen `var'_post_cf = sum(`var'_cf*w_post_diff)

	* selection-only counterfactual
	qui gen `var'_selection = nonmover_pre*`var'_pre_cf + (1-nonmover_pre)*`var'_post_cf
	
	* observed outcome (averaged over pre period)
	qui egen `var'_observed = sum(`var'*w)
	}


	
* build results table
gen helper = 1
collapse (mean) nonmover_pre *_pre_cf *_post_cf *_selection *_observed, by(helper)

foreach i in d_admit d_death admit icount mort death30 cost30_aeem {
	
			if `t'<99{
				gen `i'_diff = `i'_observed - `i'_selection
				gen `i'_diff_pc = `i'_diff / `i'_observed 
			}
			
			if `t'>98{
				gen `i'_diff = .
				gen `i'_diff_pc = .
			}
				
		}

foreach i in pre_cf post_cf selection observed diff diff_pc {
	rename *_`i' `i'_* 
	}

reshape long pre_cf_ post_cf_ selection_ observed_ diff_ diff_pc_, i(helper) j(outcome) str
rename *_ *
drop helper
format %9.3fc nonmover pre post selection obs diff*
order outcome nonmover pre post selection observed diff diff_pc

* outsheet table
save "$het/selection_`split'_auto`x'.dta", replace
outsheet using "$het/selection_`split'_auto`x'.csv", comma replace

restore
}

* Put all the data together
set more off

local split "dist50_maj"
local df=100

set more off
use "$het/selection_`split'_auto1.dta", clear
gen d1=1

forval x=2/`df'{
append using "$het/selection_`split'_auto`x'.dta"
replace d1=`x' if d1==.
}

foreach var in nonmover_pre pre_cf post_cf selection observed diff diff_pc{
rename `var' `var'_
}

reshape wide nonmover_pre_ pre_cf_ post_cf_ selection_ observed_ diff_ diff_pc_, i(d1) j(outcome) string

merge 1:1 d1 using "$het/wait_`split'_auto.dta"

*drop if d1_all==11 | d1_all==15 | d1_all==16

* Keep those with >20,000 observations

*keep if medium==1
format %9.2fc diff_admit diff_d_admit diff*pc*

/*
* Graphs
label var diff_d_death_hat "Predicted mortality"
label var diff_death "Observed mortality"
label var diff_pc_d_death "Predicted mortality (% of cf)"
label var diff_pc_death "Observed mortality (% of cf)"
label var diff_d_admit_hat "Predicted admission"
label var diff_admit "Observed admission"
label var diff_pc_d_admit "Predicted admission (% of cf)"
label var diff_pc_admit "Observed admission (% of cf)"
*/

* Make the mortality effects REDUCTIONS in mortality
foreach var in diff_mort diff_pc_mort diff_pc_d_death diff_d_death diff_death30 diff_pc_death30 {
replace `var' = -`var'
}


set more off
label var wt5 "Percent of patients exiting within 240 minutes (counterfactual)"

label var wt1 "Reduction in waiting times (minutes)"
label var wt2 "Reduction in waiting times (% of cf)"

label var diff_mort "Observed mortality"
label var diff_d_death "Predicted mortality"
label var diff_mort "Observed admission"
label var diff_d_admit "Predicted admission"

gen bin = d1
replace bin = bin-50 if d1>50
gen maj = 0
replace maj = 1 if d1>50

label define maj 0 "Minors" 1 "Majors"
label values maj maj
label var bin "Inpatient crowding (50=most crowded)"

save "$saveddata/het_dist50maj_data.dta", replace
